clear
set more off

***************************************
*Survey of Past & Present Participants
***************************************
clear
set more off

import excel using "PS_Livny_2023_Survey.xlsx", firstrow locale("UTF-8")

**ADDITION OF CURRICULUM**
*Did coursework support skills in other courses?
tab course_courses
*Which specific skills were gained?
foreach x of varlist skills_reading skills_literature skills_strategic skills_litreview skills_question skills_theory skills_conceptualization skills_measurement skills_data_find skills_data_evaluate skills_data_develop skills_data_analysis skills_results skills_writing {
tab `x'
}
*Did coursework support thesis writing?
tab thesis_course
*Compared to role of coursework in supporting thesis writing?
tab thesis_apprenticeship
*Role of coursework & apprenticeship for those who weren't initially planning on writing thesis
tab thesis_course if thesis_change=="Decided to write after program"
tab thesis_apprenticeship if thesis_change=="Decided to write after program"

**MULTI-SEMESTER PROGRAM**
*Number of semesters in program for thesis-writers
tab thesis2 semesters2 if graduation_interval=="Full program, completed", row
tab postgrad_research semesters2 if graduation_interval=="Full program, completed", row
tab ra semesters2 if graduation_interval=="Full program, completed", row

**SUPPORTING A LARGER TEAM**
*Communication skills with supervisors vs. peers and junior team-members
foreach x of varlist skills_written_peers skills_written_juniors skills_written_seniors skills_verbal_peers skills_verbal_juniors skills_verbal_seniors {
tab `x'
}
*Diversity of team-members
foreach x of varlist gender_male race ethnicity lgbtq firstgen {
tab `x'
}

**DESCRIPTIVE STATISTICS**
**Online Appendix A2.2**

*Graduation Year
histogram graduation, discrete start(2016) bcolor(gs8) barwidth(.8) legend(off) ylabel(0 .1 .2, labsize(medium) labcolor(black) tlength(1) tlcolor(black) tlwidth(vthin) angle(horiz) grid glwidth(medium) glcolor(white) format(%12.1f)) xlabel(2016 2017 2018 2019 2020 2021 2022 2023 2024 2025, angle(horiz) labsize(medium) labcolor(black) tlength(1) tlcolor(black) tlwidth(vthin) grid glwidth(medium) glcolor(white)) xtitle("Graduation Year", size(medlarge) margin(small)) ytitle("Density", size(medlarge) margin(small)) plotregion(fcolor(gs14) lcolor(none) margin(b=0 t=3 r=10 l=1)) graphregion(fcolor(white) lcolor(none)) xscale(lcolor(none)) yscale(lcolor(none))  yline(.05 .15, lcolor(white) lwidth(vthin)) xsize(7) ysize(3)

*Major
tab major_PS
tab major_other

*Semesters in program
tab semesters
replace semesters="6" if semesters=="6 or More"
destring semesters, replace
histogram semesters, discrete start(1) xscale(range(1 6)) bcolor(gs8) barwidth(.8) legend(off) ylabel(0 .3 .2 .1, labsize(medium) labcolor(black) tlength(1) tlcolor(black) tlwidth(vthin) angle(horiz) grid glwidth(medium) glcolor(white) format(%12.1f)) xlabel(1 2 3 4 5 6 "6 or More", angle(horiz) labsize(medium) labcolor(black) tlength(1) tlcolor(black) tlwidth(vthin) grid glwidth(medium) glcolor(white)) xtitle("Semesters in Program", size(medlarge) margin(small)) ytitle("Density", size(medlarge) margin(small)) plotregion(fcolor(gs14) lcolor(none) margin(b=0 t=3 r=12 l=1)) graphregion(fcolor(white) lcolor(none)) xscale(lcolor(none)) yscale(lcolor(none))  yline(.05 .15 .25, lcolor(white) lwidth(vthin)) xsize(7) ysize(3)

*Pre-participation variables
foreach x of varlist pre_knowledge pre_interest pre_honors pre_postgrad {
tab `x'
}

*Evaluation of apprenticeship
foreach x of varlist apprentice_enjoy apprentice_research apprentice_skills {
tab `x'
}

*Skills from apprenticeship
preserve
foreach x of varlist  skills_written_peers skills_written_juniors skills_written_seniors skills_verbal_peers skills_verbal_juniors skills_verbal_seniors skills_organization skills_punctuality {
tab `x', gen(`x')
}
rename skills_organization1 skills_organization2
gen skills_organization1=1 if skills_organization2==0

gen counter=1
collapse (mean) skills_*1 skills_*2, by(counter)
reshape long skills_written_peers skills_written_juniors skills_written_seniors skills_verbal_peers skills_verbal_juniors skills_verbal_seniors skills_organization skills_punctuality, i(counter) j(response) 
label define response 2 "Gained" 1 "Didn't Gain", modify
label values response response
replace skills_organization=0 if skills_organization==.


stack skills_written_peers response skills_written_juniors response skills_written_seniors response skills_verbal_peers response skills_verbal_juniors response skills_verbal_seniors response skills_organization response skills_punctuality response, into(skills response)
replace _stack=_stack*-1
label define _stack -1 "Written: Peers" -2 "Written: Juniors" -3 "Written: Seniors" -4 "Verbal: Peers" -5 "Verbal: Juniors" -6 "Verbal: Seniors" -7 "Organization" -8 "Punctuality", modify
label values _stack _stack
label define response 2 "Gained" 1 "Didn't Gain", modify
label values response response
replace skills=skills*100

twoway (bar skills _stack  if response==2, horiz bcolor(gs8) barwidth(.8)), legend(off) ylabel(#8, valuelabel labsize(medium) labcolor(black) tlength(1) tlcolor(black) tlwidth(vthin) angle(horiz) grid glwidth(medium) glcolor(white) format(%12.1f)) xlabel(0 25 50 75 100, angle(horiz) labsize(medium) labcolor(black) tlength(1) tlcolor(black) tlwidth(vthin) grid glwidth(medium) glcolor(white)) xtitle("Percent Gained", size(medlarge) margin(small)) ytitle("") plotregion(fcolor(gs14) lcolor(none) margin(b=1 t=1 r=0 l=0)) graphregion(fcolor(white) lcolor(none)) xscale(lcolor(none)) yscale(lcolor(none))  xline(12.5 37.5 62.5 87.5, lcolor(white) lwidth(vthin)) xsize(7) ysize(3)
restore

*Evaluation of coursework
foreach x of varlist course_purpose course_steps course_apply course_develop course_apprenticeship course_courses {
tab `x'
}

*Skills from coursework
preserve
foreach x of varlist  skills_reading skills_literature skills_strategic skills_litreview skills_question skills_theory skills_conceptualization skills_measurement skills_data_find skills_data_evaluate skills_data_develop skills_data_analysis skills_results skills_writing {
tab `x', gen(`x')
}
rename skills_data_find1 skills_data_find2
gen skills_data_find1=1 if skills_data_find2==0

gen counter=1
collapse (mean) skills_*1 skills_*2, by(counter)
reshape long skills_reading skills_literature skills_strategic skills_litreview skills_question skills_theory skills_conceptualization skills_measurement skills_data_find skills_data_evaluate skills_data_develop skills_data_analysis skills_results skills_writing, i(counter) j(response) 
label define response 2 "Gained" 1 "Didn't Gain", modify
label values response response
replace skills_data_find=0 if skills_data_find==.

stack skills_reading response skills_literature response skills_strategic response skills_litreview response skills_question response skills_theory response skills_conceptualization response skills_measurement response skills_data_find response skills_data_evaluate response skills_data_develop response skills_data_analysis response skills_results response skills_writing response, into(skills response)
replace _stack=_stack*-1
label define _stack -1 "Reading Research" -2 "Identifying Literature" -3 "Strategic Reading" -4 "Literature Review" -5 "Identifying Question" -6 "Developing Theory" -7 "Defining Variables" -8 "Measurement" -9 "Finding Datasets" -10 "Evaluating Datasets" -11 "Developing Datasets" -12 "Data Analysis" -13 "Discussing Results" -14 "Writing Paper", modify
label values _stack _stack
label define response 2 "Gained" 1 "Didn't Gain", modify
label values response response
replace skills=skills*100

twoway (bar skills _stack  if response==2, horiz bcolor(gs8) barwidth(.8)), legend(off) ylabel(#14, valuelabel labsize(medium) labcolor(black) tlength(1) tlcolor(black) tlwidth(vthin) angle(horiz) grid glwidth(medium) glcolor(white) format(%12.1f)) xlabel(0 25 50 75 100, angle(horiz) labsize(medium) labcolor(black) tlength(1) tlcolor(black) tlwidth(vthin) grid glwidth(medium) glcolor(white)) xtitle("Percent Gained", size(medlarge) margin(small)) ytitle("") plotregion(fcolor(gs14) lcolor(none) margin(b=1 t=1 r=0 l=0)) graphregion(fcolor(white) lcolor(none)) xscale(lcolor(none)) yscale(lcolor(none))  xline(12.5 37.5 62.5 87.5, lcolor(white) lwidth(vthin)) xsize(7) ysize(4)
restore

*Thesis/capstone project
foreach x of varlist thesis  thesis_apprenticeship thesis_course {
tab `x'
}
*Influence of program on those who wrote/will write a thesis
tab thesis_influence if thesis=="Planning to write"|thesis=="Wrote/Am writing"
*Influence of program on those who did/will not write a thesis
tab thesis_influence if thesis=="Didn't/Won't write"

*Work as RA
foreach x of varlist ra_pi ra_another ra_paid ra_credit ra_unpaid ra_apprenticeship ra_course {
tab `x'
}

*Postgraduate plans
foreach x of varlist postgrad postgrad_research {
tab `x'
}

*Demographics
sum age, detail
foreach x of varlist gender lgbtq race ethnicity firstgen disability veteran {
tab `x'
}

*******************************************
*Participant & Non-Participant Performance
*******************************************
cler
set more off

import excel using "PS_Livny_2023_MatchedStudents.xlsx", firstrow locale("UTF-8")

**DESCRIPTIVE STATISTICS**
**Online Appendix A3.2**

*Number of participants
tab participant
*Number of semesters completed (among those not currently in program)
sum semesters if matriculation>=2018, detail

*Demographics
tab participant male, row nofreq
tab participant race, row nofreq
tab participant transfer, row nofreq

*Student achievement
bysort participant: sum gpa_final
bysort participant: sum advanced_firstyear
bysort participant: sum advanced_final
tab participant thesis, row nofreq

**ESTIMATED EFFECT OF PARTICIPATION**
**Online Appendix A3.3**

gen participant_dummy=0 if participant=="No"
replace participant_dummy=1 if participant=="Yes"
gen transfer_dummy=0 if transfer=="Non-transfer"
replace transfer_dummy=1 if transfer=="Transfer student"
gen thesis_dummy=0 if thesis=="No"
replace thesis_dummy=1 if thesis=="Yes"
gen male_dummy=0 if male=="Female, Non-Binary"
replace male_dummy=1 if male=="Male"

teffects psmatch (gpa_final) (participant_dummy advanced_firstyear matriculation graduation transfer_dummy male_dummy white gpa_firstyear)
teffects psmatch (advanced_final) (participant_dummy advanced_first matriculation graduation transfer_dummy male_dummy white gpa_firstyear)
teffects psmatch (thesis_dummy) (participant_dummy advanced_first matriculation graduation transfer_dummy male_dummy white gpa_firstyear)

**ESTIMATED EFFECT OF MULTIPLE_SEMESTERS**
**Online Appendix A3.3**

gen semesters_dummy=0 if semesters~=0
replace semesters_dummy=1 if semesters>1&semesters~=.
replace semesters_dummy=. if semesters==.

ttest gpa_final, by(semesters_dummy)
ttest advanced_final, by(semesters_dummy)
ttest thesis_dummy, by(semesters_dummy)
